; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s

define dso_local <32 x half> @foo(<32 x half> %a, <32 x half> %b, <32 x half> %c) {
; CHECK-LABEL: foo:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0]
; CHECK-NEXT:    # zmm3 = mem[0,1,2,3,0,1,2,3]
; CHECK-NEXT:    vfmadd213ph %zmm2, %zmm3, %zmm0
; CHECK-NEXT:    vfmadd213ph %zmm2, %zmm3, %zmm1
; CHECK-NEXT:    vaddph %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = tail call fast <32 x half> @llvm.fma.v32f16(<32 x half> %a, <32 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00>, <32 x half> %c)
  %1 = tail call fast <32 x half> @llvm.fma.v32f16(<32 x half> %b, <32 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00, half 0xHBC00>, <32 x half> %c)
  %2 = fadd <32 x half> %0, %1
  ret <32 x half> %2
}

declare <32 x half> @llvm.fma.v32f16(<32 x half>, <32 x half>, <32 x half>)
